#!/bin/bash
# ODS拉链表Clue

HIVE_HOME=/usr/bin/hive
SQOOP_HOME=/usr/bin/sqoop

# 需求：能够手动传入参数指定采集的日期。如果不传入参数要求能够自动的识别前一天的日期
if [ $# -eq 0 ]
then
    DATESTR=`date -d "-1 day" +%Y-%m-%d`
else
    DATESTR=$1
fi

# 一、重建customer_clue_update增量表，每次使用都要重建，避免数据重复问题
${HIVE_HOME} -e "
DROP TABLE IF EXISTS itcast_ods.customer_clue_update;
CREATE TABLE IF NOT EXISTS itcast_ods.customer_clue_update (
    id                       INT COMMENT 'customer_clue_id',
    create_date_time         STRING COMMENT '创建时间',
    update_date_time         STRING COMMENT '最后更新时间',
    deleted                  STRING COMMENT '是否被删除（禁用）',
    customer_id              INT COMMENT '客户id',
    customer_relationship_id INT COMMENT '客户关系id',
    session_id               STRING COMMENT '七陌会话id',
    sid                      STRING COMMENT '访客id',
    status                   STRING COMMENT '状态（undeal待领取 deal 已领取 finish 已关闭 changePeer 已流转）',
    users                    STRING COMMENT '所属坐席',
    create_time              STRING COMMENT '七陌创建时间',
    platform                 STRING COMMENT '平台来源 （pc-网站咨询|wap-wap咨询|sdk-app咨询|weixin-微信咨询）',
    s_name                   STRING COMMENT '用户名称',
    seo_source               STRING COMMENT '搜索来源',
    seo_keywords             STRING COMMENT '关键字',
    ip                       STRING COMMENT 'IP地址',
    referrer                 STRING COMMENT '上级来源页面',
    from_url                 STRING COMMENT '会话来源页面',
    landing_page_url         STRING COMMENT '访客着陆页面',
    url_title                STRING COMMENT '咨询页面title',
    to_peer                  STRING COMMENT '所属技能组',
    manual_time              STRING COMMENT '人工开始时间',
    begin_time               STRING COMMENT '坐席领取时间 ',
    reply_msg_count          INT COMMENT '客服回复消息数',
    total_msg_count          INT COMMENT '消息总数',
    msg_count                INT COMMENT '客户发送消息数',
    comment                  STRING COMMENT '备注',
    finish_reason            STRING COMMENT '结束类型',
    finish_user              STRING COMMENT '结束坐席',
    end_time                 STRING COMMENT '会话结束时间',
    platform_description     STRING COMMENT '客户平台信息',
    browser_name             STRING COMMENT '浏览器名称',
    os_info                  STRING COMMENT '系统名称',
    area                     STRING COMMENT '区域',
    country                  STRING COMMENT '所在国家',
    province                 STRING COMMENT '省',
    city                     STRING COMMENT '城市',
    creator                  INT COMMENT '创建人',
    name                     STRING COMMENT '客户姓名',
    idcard                   STRING COMMENT '身份证号',
    phone                    STRING COMMENT '手机号',
    itcast_school_id         INT COMMENT '校区Id',
    itcast_school            STRING COMMENT '校区',
    itcast_subject_id        INT COMMENT '学科Id',
    itcast_subject           STRING COMMENT '学科',
    wechat                   STRING COMMENT '微信',
    qq                       STRING COMMENT 'qq号',
    email                    STRING COMMENT '邮箱',
    gender                   STRING COMMENT '性别',
    level                    STRING COMMENT '客户级别',
    origin_type              STRING COMMENT '数据来源渠道',
    information_way          STRING COMMENT '资讯方式',
    working_years            STRING COMMENT '开始工作时间',
    technical_directions     STRING COMMENT '技术方向',
    customer_state           STRING COMMENT '当前客户状态',
    valid                    STRING COMMENT '该线索是否是网资有效线索',
    anticipat_signup_date    STRING COMMENT '预计报名时间',
    clue_state               STRING COMMENT '线索状态',
    scrm_department_id       INT COMMENT 'SCRM内部部门id',
    superior_url             STRING COMMENT '诸葛获取上级页面URL',
    superior_source          STRING COMMENT '诸葛获取上级页面URL标题',
    landing_url              STRING COMMENT '诸葛获取着陆页面URL',
    landing_source           STRING COMMENT '诸葛获取着陆页面URL来源',
    info_url                 STRING COMMENT '诸葛获取留咨页URL',
    info_source              STRING COMMENT '诸葛获取留咨页URL标题',
    origin_channel           STRING COMMENT '投放渠道',
    course_id                INT COMMENT '课程编号',
    course_name              STRING COMMENT '课程名称',
    zhuge_session_id         STRING COMMENT 'zhuge会话id',
    is_repeat                INT COMMENT '是否重复线索(手机号维度) 0:正常 1：重复',
    tenant                   INT COMMENT '租户id',
    activity_id              STRING COMMENT '活动id',
    activity_name            STRING COMMENT '活动名称',
    follow_type              INT COMMENT '分配类型，0-自动分配，1-手动分配，2-自动转移，3-手动单个转移，4-手动批量转移，5-公海领取',
    shunt_mode_id            INT COMMENT '匹配到的技能组id',
    shunt_employee_group_id  INT COMMENT '所属分流员工组',
    ends_time                STRING COMMENT '有效时间'
) COMMENT '客户关系表'
PARTITIONED BY (starts_time STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS ORC
TBLPROPERTIES ('orc.compress' = 'ZLIB');
"

# 模拟增加2条，修改1条
mysql -uroot -p123456 scrm -e "
INSERT INTO scrm.customer_clue VALUES ('1106801', '2021-01-18 10:10:10', '2021-01-18 10:10:10', '', '2', '2', null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, '451', '**', '', '186****0002', '1', '北京', null, null, null, '271****75', '', 'WOMEN', 'A', 'NETSERVICE', null, null, null, null, '', null, 'INVALID_PUBLIC_OLD_CLUE', '53', null, null, null, null, null, null, '', null, null, null, '0', '1', null, null, '0', null, null);
INSERT INTO scrm.customer_clue VALUES ('1106802', '2021-01-18 11:11:11', '2021-01-18 11:11:11', '', '2', '2', null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, '451', '**', '', '186****0002', '1', '北京', null, null, null, '271****75', '', 'WOMEN', 'A', 'NETSERVICE', null, null, null, null, '', null, 'INVALID_PUBLIC_OLD_CLUE', '53', null, null, null, null, null, null, '', null, null, null, '0', '1', null, null, '0', null, null);
UPDATE scrm.customer_clue set update_date_time = '2021-01-18 12:12:12' WHERE id = 1106800;
UPDATE scrm.customer_clue set url_title = '哪怕抛掉出身的因素，我觉得，你也未必干得过别人。' WHERE id = 1106800;
"

# 二、抽取昨天新增和更新的数据
${SQOOP_HOME} import \
--connect jdbc:mysql://192.168.10.150:3306/scrm \
--username root \
--password 123456 \
--query "
SELECT id,
       create_date_time,
       update_date_time,
       deleted,
       customer_id,
       customer_relationship_id,
       session_id,
       sid,
       status,
       user         AS users,
       create_time,
       platform,
       s_name,
       seo_source,
       seo_keywords,
       ip,
       referrer,
       from_url,
       landing_page_url,
       url_title,
       to_peer,
       manual_time,
       begin_time,
       reply_msg_count,
       total_msg_count,
       msg_count,
       comment,
       finish_reason,
       finish_user,
       end_time,
       platform_description,
       browser_name,
       os_info,
       area,
       country,
       province,
       city,
       creator,
       name,
       idcard,
       phone,
       itcast_school_id,
       itcast_school,
       itcast_subject_id,
       itcast_subject,
       wechat,
       qq,
       email,
       gender,
       level,
       origin_type,
       information_way,
       working_years,
       technical_directions,
       customer_state,
       valid,
       anticipat_signup_date,
       clue_state,
       scrm_department_id,
       superior_url,
       superior_source,
       landing_url,
       landing_source,
       info_url,
       info_source,
       origin_channel,
       course_id,
       course_name,
       zhuge_session_id,
       is_repeat,
       tenant,
       activity_id,
       activity_name,
       follow_type,
       shunt_mode_id,
       shunt_employee_group_id,
       '${DATESTR}' AS starts_time,
       '9999-12-31' AS ends_time
FROM customer_clue
WHERE (
        create_date_time >= '${DATESTR} 00:00:00'
        AND
        create_date_time <= '${DATESTR} 23:59:59')
   OR (
        update_date_time >= '${DATESTR} 00:00:00'
        AND
        update_date_time <= '${DATESTR} 23:59:59') AND \$CONDITIONS" \
--hcatalog-database itcast_ods \
--hcatalog-table customer_clue_update \
--hive-partition-key starts_time \
--hive-partition-value ${DATESTR} \
-m 1 \
--split-by id

# 三、重建customer_clue_tmp临时表
${HIVE_HOME} -e "
DROP TABLE IF EXISTS itcast_ods.customer_clue_tmp;
CREATE TABLE IF NOT EXISTS itcast_ods.customer_clue_tmp (
    id                       INT COMMENT 'customer_clue_id',
    create_date_time         STRING COMMENT '创建时间',
    update_date_time         STRING COMMENT '最后更新时间',
    deleted                  STRING COMMENT '是否被删除（禁用）',
    customer_id              INT COMMENT '客户id',
    customer_relationship_id INT COMMENT '客户关系id',
    session_id               STRING COMMENT '七陌会话id',
    sid                      STRING COMMENT '访客id',
    status                   STRING COMMENT '状态（undeal待领取 deal 已领取 finish 已关闭 changePeer 已流转）',
    users                    STRING COMMENT '所属坐席',
    create_time              STRING COMMENT '七陌创建时间',
    platform                 STRING COMMENT '平台来源 （pc-网站咨询|wap-wap咨询|sdk-app咨询|weixin-微信咨询）',
    s_name                   STRING COMMENT '用户名称',
    seo_source               STRING COMMENT '搜索来源',
    seo_keywords             STRING COMMENT '关键字',
    ip                       STRING COMMENT 'IP地址',
    referrer                 STRING COMMENT '上级来源页面',
    from_url                 STRING COMMENT '会话来源页面',
    landing_page_url         STRING COMMENT '访客着陆页面',
    url_title                STRING COMMENT '咨询页面title',
    to_peer                  STRING COMMENT '所属技能组',
    manual_time              STRING COMMENT '人工开始时间',
    begin_time               STRING COMMENT '坐席领取时间 ',
    reply_msg_count          INT COMMENT '客服回复消息数',
    total_msg_count          INT COMMENT '消息总数',
    msg_count                INT COMMENT '客户发送消息数',
    comment                  STRING COMMENT '备注',
    finish_reason            STRING COMMENT '结束类型',
    finish_user              STRING COMMENT '结束坐席',
    end_time                 STRING COMMENT '会话结束时间',
    platform_description     STRING COMMENT '客户平台信息',
    browser_name             STRING COMMENT '浏览器名称',
    os_info                  STRING COMMENT '系统名称',
    area                     STRING COMMENT '区域',
    country                  STRING COMMENT '所在国家',
    province                 STRING COMMENT '省',
    city                     STRING COMMENT '城市',
    creator                  INT COMMENT '创建人',
    name                     STRING COMMENT '客户姓名',
    idcard                   STRING COMMENT '身份证号',
    phone                    STRING COMMENT '手机号',
    itcast_school_id         INT COMMENT '校区Id',
    itcast_school            STRING COMMENT '校区',
    itcast_subject_id        INT COMMENT '学科Id',
    itcast_subject           STRING COMMENT '学科',
    wechat                   STRING COMMENT '微信',
    qq                       STRING COMMENT 'qq号',
    email                    STRING COMMENT '邮箱',
    gender                   STRING COMMENT '性别',
    level                    STRING COMMENT '客户级别',
    origin_type              STRING COMMENT '数据来源渠道',
    information_way          STRING COMMENT '资讯方式',
    working_years            STRING COMMENT '开始工作时间',
    technical_directions     STRING COMMENT '技术方向',
    customer_state           STRING COMMENT '当前客户状态',
    valid                    STRING COMMENT '该线索是否是网资有效线索',
    anticipat_signup_date    STRING COMMENT '预计报名时间',
    clue_state               STRING COMMENT '线索状态',
    scrm_department_id       INT COMMENT 'SCRM内部部门id',
    superior_url             STRING COMMENT '诸葛获取上级页面URL',
    superior_source          STRING COMMENT '诸葛获取上级页面URL标题',
    landing_url              STRING COMMENT '诸葛获取着陆页面URL',
    landing_source           STRING COMMENT '诸葛获取着陆页面URL来源',
    info_url                 STRING COMMENT '诸葛获取留咨页URL',
    info_source              STRING COMMENT '诸葛获取留咨页URL标题',
    origin_channel           STRING COMMENT '投放渠道',
    course_id                INT COMMENT '课程编号',
    course_name              STRING COMMENT '课程名称',
    zhuge_session_id         STRING COMMENT 'zhuge会话id',
    is_repeat                INT COMMENT '是否重复线索(手机号维度) 0:正常 1：重复',
    tenant                   INT COMMENT '租户id',
    activity_id              STRING COMMENT '活动id',
    activity_name            STRING COMMENT '活动名称',
    follow_type              INT COMMENT '分配类型，0-自动分配，1-手动分配，2-自动转移，3-手动单个转移，4-手动批量转移，5-公海领取',
    shunt_mode_id            INT COMMENT '匹配到的技能组id',
    shunt_employee_group_id  INT COMMENT '所属分流员工组',
    ends_time                STRING COMMENT '有效时间'
) COMMENT '客户关系表'
PARTITIONED BY (starts_time STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS ORC
TBLPROPERTIES ('orc.compress' = 'ZLIB');
"

# 四、合并增量数据与历史数据并写到tmp表中
${HIVE_HOME} -e "
--分区
SET hive.exec.dynamic.partition=true;
SET hive.exec.dynamic.partition.mode=nonstrict;
SET hive.exec.max.dynamic.partitions.pernode=10000;
SET hive.exec.max.dynamic.partitions=100000;
SET hive.exec.max.created.files=150000;
--hive压缩
SET hive.exec.compress.intermediate=true;
SET hive.exec.compress.output=true;
--写入时压缩生效
SET hive.exec.orc.compression.strategy=COMPRESSION;
--插入数据到临时表
INSERT OVERWRITE TABLE itcast_ods.customer_clue_tmp PARTITION (starts_time)
SELECT *
FROM (
    SELECT *
      FROM itcast_ods.customer_clue_update

      UNION ALL

      SELECT rs.id,
             rs.create_date_time,
             rs.update_date_time,
             rs.deleted,
             rs.customer_id,
             rs.customer_relationship_id,
             rs.session_id,
             rs.sid,
             rs.status,
             rs.users,
             rs.create_time,
             rs.platform,
             rs.s_name,
             rs.seo_source,
             rs.seo_keywords,
             rs.ip,
             rs.referrer,
             rs.from_url,
             rs.landing_page_url,
             rs.url_title,
             rs.to_peer,
             rs.manual_time,
             rs.begin_time,
             rs.reply_msg_count,
             rs.total_msg_count,
             rs.msg_count,
             rs.comment,
             rs.finish_reason,
             rs.finish_user,
             rs.end_time,
             rs.platform_description,
             rs.browser_name,
             rs.os_info,
             rs.area,
             rs.country,
             rs.province,
             rs.city,
             rs.creator,
             rs.name,
             rs.idcard,
             rs.phone,
             rs.itcast_school_id,
             rs.itcast_school,
             rs.itcast_subject_id,
             rs.itcast_subject,
             rs.wechat,
             rs.qq,
             rs.email,
             rs.gender,
             rs.level,
             rs.origin_type,
             rs.information_way,
             rs.working_years,
             rs.technical_directions,
             rs.customer_state,
             rs.valid,
             rs.anticipat_signup_date,
             rs.clue_state,
             rs.scrm_department_id,
             rs.superior_url,
             rs.superior_source,
             rs.landing_url,
             rs.landing_source,
             rs.info_url,
             rs.info_source,
             rs.origin_channel,
             rs.course_id,
             rs.course_name,
             rs.zhuge_session_id,
             rs.is_repeat,
             rs.tenant,
             rs.activity_id,
             rs.activity_name,
             rs.follow_type,
             rs.shunt_mode_id,
             rs.shunt_employee_group_id,
             IF(up.id IS NOT NULL AND rs.ends_time = '9999-12-31', up.update_date_time, rs.ends_time) AS ends_time,
             rs.starts_time
      FROM itcast_ods.customer_clue rs
               LEFT JOIN itcast_ods.customer_clue_update up ON rs.id = up.id
    ) his
ORDER BY his.id, his.ends_time;
"

# 五、临时表覆盖到拉链表
${HIVE_HOME} -e "
--分区
SET hive.exec.dynamic.partition=true;
SET hive.exec.dynamic.partition.mode=nonstrict;
SET hive.exec.max.dynamic.partitions.pernode=10000;
SET hive.exec.max.dynamic.partitions=100000;
SET hive.exec.max.created.files=150000;
--hive压缩
SET hive.exec.compress.intermediate=true;
SET hive.exec.compress.output=true;
--写入时压缩生效
SET hive.exec.orc.compression.strategy=COMPRESSION;
--分桶
SET hive.enforce.bucketing=true;
SET hive.enforce.sorting=true;
SET hive.optimize.bucketmapjoin = true;
SET hive.auto.convert.sortmerge.join=true;
SET hive.auto.convert.sortmerge.join.noconditionaltask=true;
--覆盖写回数据
INSERT OVERWRITE TABLE itcast_ods.customer_clue PARTITION (starts_time)
SELECT *
FROM itcast_ods.customer_clue_tmp;
"
